library(ggplot2) # (Wickham, 2016)
library(tidyr) # (Wickham and Henry, 2020)
library(dplyr) # (Wickham et al., 2020)
library(reshape2) # (Wickham, 2007)
library(cowplot) # (Wilke, 2019)
library(patchwork) # (Pederson, 2020)
library(viridis) # (Garnier, 2018)
library(hexbin)
We conducted these analyses using the following computing environment:
print(version)
## _
## platform x86_64-apple-darwin15.6.0
## arch x86_64
## os darwin15.6.0
## system x86_64, darwin15.6.0
## status
## major 3
## minor 6.2
## year 2019
## month 12
## day 12
## svn rev 77560
## language R
## version.string R version 3.6.2 (2019-12-12)
## nickname Dark and Stormy Night
data_path <- "./data/agg_data.csv"
agg_data <- read.csv(data_path, na.strings="NONE")
agg_data$BIT_FLIP_PROB <- as.factor(agg_data$BIT_FLIP_PROB)
agg_data$DRIFT <- agg_data$TOURNAMENT_SIZE==1
chg_rate_label <- function(mag, interval, drift) {
if (drift) { return("drift") }
else if (interval == 0) { return("0") }
else { return(paste(mag, interval, sep="/")) }
}
agg_data$chg_rate_label <- factor(mapply(chg_rate_label,
agg_data$CHANGE_MAGNITUDE,
agg_data$CHANGE_FREQUENCY,
agg_data$DRIFT),
levels=c("drift", "0", "1/256", "1/128",
"1/64", "1/32", "1/16", "1/8",
"1/4", "1/2", "1/1", "2/1",
"4/1", "8/1", "16/1", "32/1",
"64/1", "128/1", "256/1",
"512/1", "1024/1", "2048/1",
"4096/1"))
theme_set(theme_cowplot())
data_nk_phase0 <-
filter(agg_data, GRADIENT_MODEL==0 & evo_phase == 0 & update==50000)
data_nk_phase1 <-
filter(agg_data, GRADIENT_MODEL==0 & evo_phase == 1 & update==60000)
data_gradient_phase0 <-
filter(agg_data, GRADIENT_MODEL==1 & evo_phase == 0 & update==50000)
data_gradient_phase1 <-
filter(agg_data, GRADIENT_MODEL==1 & evo_phase == 1 & update==60000)
ggplot(data_gradient_phase0,
aes(x=BIT_FLIP_PROB, y=fitness, color=BIT_FLIP_PROB)) +
geom_boxplot() +
xlab("Bit Flip Rate") +
scale_y_continuous(name="Fitness") +
facet_wrap(~ chg_rate_label) +
ggtitle("Gradient Fitness Model") +
theme(legend.position="none",
axis.text.x=element_text(angle=90))
ggplot(data_nk_phase0,
aes(x=BIT_FLIP_PROB, y=fitness, color=BIT_FLIP_PROB)) +
geom_boxplot() +
xlab("Bit Flip Rate") +
scale_y_continuous(name="Fitness") +
facet_wrap(~ chg_rate_label) +
ggtitle("Gradient Fitness Model") +
theme(legend.position="none",
axis.text.x=element_text(angle=90))
ggplot(data_gradient_phase0,
aes(x=BIT_FLIP_PROB, y=coding_sites, color=BIT_FLIP_PROB)) +
geom_boxplot() +
xlab("Bit Flip Rate") +
scale_y_continuous(name="Coding Sites (in best organism)",
limits=c(0, 130),
breaks=seq(0, 130, 20)) +
facet_wrap(~ chg_rate_label) +
ggtitle("Gradient Fitness Model") +
theme(legend.position="none",
axis.text.x=element_text(angle=90))
chg_rates <- levels(data_gradient_phase0$chg_rate_label)
mut_rates <- levels(data_gradient_phase0$BIT_FLIP_PROB)
for (chg_rate in chg_rates) {
df <- filter(data_gradient_phase0, chg_rate_label==chg_rate)
if (length(df$update) == 0) { next(); }
print(paste("===== Change rate: ", chg_rate, " =====", sep=""))
for (mut_rate in mut_rates) {
mr <- filter(df, BIT_FLIP_PROB==mut_rate)
if (length(mr$update) == 0) { next(); }
med <- median(mr$coding_sites)
print(paste("Median for", mut_rate, ":", med))
}
kt <- kruskal.test(formula = coding_sites ~ BIT_FLIP_PROB, data=df)
print(kt)
if (kt$p.value <= 0.05) {
wt <- pairwise.wilcox.test(x=df$coding_sites,
g=df$BIT_FLIP_PROB,
exact=FALSE,
p.adjust.method = "bonferroni")
print(wt)
}
}
## [1] "===== Change rate: drift ====="
## [1] "Median for 3e-04 : 81.5"
## [1] "Median for 0.001 : 75.5"
## [1] "Median for 0.003 : 69.5"
## [1] "Median for 0.01 : 64.5"
## [1] "Median for 0.03 : 71"
## [1] "Median for 0.1 : 75.5"
##
## Kruskal-Wallis rank sum test
##
## data: coding_sites by BIT_FLIP_PROB
## Kruskal-Wallis chi-squared = 9.8694, df = 5, p-value = 0.07902
##
## [1] "===== Change rate: 0 ====="
## [1] "Median for 3e-04 : 78"
## [1] "Median for 0.001 : 76"
## [1] "Median for 0.003 : 76"
## [1] "Median for 0.01 : 72.5"
## [1] "Median for 0.03 : 67"
## [1] "Median for 0.1 : 28"
##
## Kruskal-Wallis rank sum test
##
## data: coding_sites by BIT_FLIP_PROB
## Kruskal-Wallis chi-squared = 340.2, df = 5, p-value < 2.2e-16
##
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: df$coding_sites and df$BIT_FLIP_PROB
##
## 3e-04 0.001 0.003 0.01 0.03
## 0.001 1.0000 - - - -
## 0.003 0.1582 1.0000 - - -
## 0.01 5.7e-07 0.0005 0.0622 - -
## 0.03 < 2e-16 < 2e-16 7.7e-13 3.8e-08 -
## 0.1 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16
##
## P value adjustment method: bonferroni
## [1] "===== Change rate: 1/128 ====="
## [1] "Median for 3e-04 : 124"
## [1] "Median for 0.001 : 123"
## [1] "Median for 0.003 : 119"
## [1] "Median for 0.01 : 110"
## [1] "Median for 0.03 : 89"
## [1] "Median for 0.1 : 39"
##
## Kruskal-Wallis rank sum test
##
## data: coding_sites by BIT_FLIP_PROB
## Kruskal-Wallis chi-squared = 476.71, df = 5, p-value < 2.2e-16
##
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: df$coding_sites and df$BIT_FLIP_PROB
##
## 3e-04 0.001 0.003 0.01 0.03
## 0.001 1 - - - -
## 0.003 1.1e-09 5.9e-08 - - -
## 0.01 < 2e-16 < 2e-16 2.3e-12 - -
## 0.03 < 2e-16 < 2e-16 < 2e-16 < 2e-16 -
## 0.1 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16
##
## P value adjustment method: bonferroni
## [1] "===== Change rate: 1/4 ====="
## [1] "Median for 3e-04 : 127"
## [1] "Median for 0.001 : 128"
## [1] "Median for 0.003 : 128"
## [1] "Median for 0.01 : 127"
## [1] "Median for 0.03 : 115.5"
## [1] "Median for 0.1 : 65"
##
## Kruskal-Wallis rank sum test
##
## data: coding_sites by BIT_FLIP_PROB
## Kruskal-Wallis chi-squared = 400.32, df = 5, p-value < 2.2e-16
##
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: df$coding_sites and df$BIT_FLIP_PROB
##
## 3e-04 0.001 0.003 0.01 0.03
## 0.001 0.685 - - - -
## 0.003 0.012 1.000 - - -
## 0.01 1.000 1.000 0.023 - -
## 0.03 <2e-16 <2e-16 <2e-16 <2e-16 -
## 0.1 <2e-16 <2e-16 <2e-16 <2e-16 <2e-16
##
## P value adjustment method: bonferroni
## [1] "===== Change rate: 4/1 ====="
## [1] "Median for 3e-04 : 116"
## [1] "Median for 0.001 : 111.5"
## [1] "Median for 0.003 : 111"
## [1] "Median for 0.01 : 104"
## [1] "Median for 0.03 : 104"
## [1] "Median for 0.1 : 97"
##
## Kruskal-Wallis rank sum test
##
## data: coding_sites by BIT_FLIP_PROB
## Kruskal-Wallis chi-squared = 125.36, df = 5, p-value < 2.2e-16
##
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: df$coding_sites and df$BIT_FLIP_PROB
##
## 3e-04 0.001 0.003 0.01 0.03
## 0.001 0.02862 - - - -
## 0.003 0.00794 1.00000 - - -
## 0.01 7.6e-09 0.00016 0.02481 - -
## 0.03 7.4e-10 5.5e-05 0.01170 1.00000 -
## 0.1 < 2e-16 4.4e-14 1.0e-09 0.00279 0.00486
##
## P value adjustment method: bonferroni
ggplot(data_nk_phase0,
aes(x=BIT_FLIP_PROB, y=coding_sites, color=BIT_FLIP_PROB)) +
geom_boxplot() +
xlab("Bit Flip Rate") +
scale_y_continuous(name="Coding Sites (in best organism)",
limits=c(0, 130),
breaks=seq(0, 130, 20)) +
facet_wrap(~ chg_rate_label) +
ggtitle("NK Fitness Model") +
theme(legend.position="none",
axis.text.x=element_text(angle=90))
chg_rates <- levels(data_nk_phase0$chg_rate_label)
mut_rates <- levels(data_nk_phase0$BIT_FLIP_PROB)
for (chg_rate in chg_rates) {
df <- filter(data_nk_phase0, chg_rate_label==chg_rate)
if (length(df$update) == 0) { next(); }
print(paste("===== Change rate: ", chg_rate, " =====", sep=""))
for (mut_rate in mut_rates) {
mr <- filter(df, BIT_FLIP_PROB==mut_rate)
if (length(mr$update) == 0) { next(); }
med <- median(mr$coding_sites)
print(paste("Median for", mut_rate, ":", med))
}
kt <- kruskal.test(formula = coding_sites ~ BIT_FLIP_PROB, data=df)
print(kt)
if (kt$p.value <= 0.05) {
wt <- pairwise.wilcox.test(x=df$coding_sites,
g=df$BIT_FLIP_PROB,
exact=FALSE,
p.adjust.method = "bonferroni")
print(wt)
}
}
## [1] "===== Change rate: drift ====="
## [1] "Median for 3e-04 : 77.5"
## [1] "Median for 0.001 : 78.5"
## [1] "Median for 0.003 : 76.5"
## [1] "Median for 0.01 : 69.5"
## [1] "Median for 0.03 : 72"
## [1] "Median for 0.1 : 69"
##
## Kruskal-Wallis rank sum test
##
## data: coding_sites by BIT_FLIP_PROB
## Kruskal-Wallis chi-squared = 2.3393, df = 5, p-value = 0.8005
##
## [1] "===== Change rate: 0 ====="
## [1] "Median for 3e-04 : 77"
## [1] "Median for 0.001 : 76"
## [1] "Median for 0.003 : 75.5"
## [1] "Median for 0.01 : 73"
## [1] "Median for 0.03 : 60"
## [1] "Median for 0.1 : 21"
##
## Kruskal-Wallis rank sum test
##
## data: coding_sites by BIT_FLIP_PROB
## Kruskal-Wallis chi-squared = 404.75, df = 5, p-value < 2.2e-16
##
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: df$coding_sites and df$BIT_FLIP_PROB
##
## 3e-04 0.001 0.003 0.01 0.03
## 0.001 1.0000 - - - -
## 0.003 1.0000 1.0000 - - -
## 0.01 0.0093 0.1669 0.3626 - -
## 0.03 <2e-16 <2e-16 <2e-16 <2e-16 -
## 0.1 <2e-16 <2e-16 <2e-16 <2e-16 <2e-16
##
## P value adjustment method: bonferroni
## [1] "===== Change rate: 2/1 ====="
## [1] "Median for 3e-04 : 117"
## [1] "Median for 0.001 : 117"
## [1] "Median for 0.003 : 115.5"
## [1] "Median for 0.01 : 113"
## [1] "Median for 0.03 : 64.5"
## [1] "Median for 0.1 : 27"
##
## Kruskal-Wallis rank sum test
##
## data: coding_sites by BIT_FLIP_PROB
## Kruskal-Wallis chi-squared = 423.99, df = 5, p-value < 2.2e-16
##
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: df$coding_sites and df$BIT_FLIP_PROB
##
## 3e-04 0.001 0.003 0.01 0.03
## 0.001 1.0000 - - - -
## 0.003 1.0000 1.0000 - - -
## 0.01 0.0075 0.0022 0.4187 - -
## 0.03 <2e-16 <2e-16 <2e-16 <2e-16 -
## 0.1 <2e-16 <2e-16 <2e-16 <2e-16 <2e-16
##
## P value adjustment method: bonferroni
## [1] "===== Change rate: 64/1 ====="
## [1] "Median for 3e-04 : 123"
## [1] "Median for 0.001 : 123"
## [1] "Median for 0.003 : 123"
## [1] "Median for 0.01 : 120.5"
## [1] "Median for 0.03 : 75"
## [1] "Median for 0.1 : 13"
##
## Kruskal-Wallis rank sum test
##
## data: coding_sites by BIT_FLIP_PROB
## Kruskal-Wallis chi-squared = 423.98, df = 5, p-value < 2.2e-16
##
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: df$coding_sites and df$BIT_FLIP_PROB
##
## 3e-04 0.001 0.003 0.01 0.03
## 0.001 1.00000 - - - -
## 0.003 1.00000 1.00000 - - -
## 0.01 0.00028 0.00254 0.05478 - -
## 0.03 < 2e-16 < 2e-16 < 2e-16 < 2e-16 -
## 0.1 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16
##
## P value adjustment method: bonferroni
## [1] "===== Change rate: 1024/1 ====="
## [1] "Median for 3e-04 : 108"
## [1] "Median for 0.001 : 102"
## [1] "Median for 0.003 : 97.5"
## [1] "Median for 0.01 : 97.5"
## [1] "Median for 0.03 : 91.5"
## [1] "Median for 0.1 : 16"
##
## Kruskal-Wallis rank sum test
##
## data: coding_sites by BIT_FLIP_PROB
## Kruskal-Wallis chi-squared = 268.94, df = 5, p-value < 2.2e-16
##
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: df$coding_sites and df$BIT_FLIP_PROB
##
## 3e-04 0.001 0.003 0.01 0.03
## 0.001 0.021 - - - -
## 0.003 3.3e-06 0.485 - - -
## 0.01 3.2e-06 0.417 1.000 - -
## 0.03 8.8e-13 9.9e-06 0.024 0.105 -
## 0.1 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16
##
## P value adjustment method: bonferroni
ggplot(data_gradient_phase0,
aes(x=BIT_FLIP_PROB, y=genome_length, color=BIT_FLIP_PROB)) +
geom_boxplot() +
xlab("Bit Flip Rate") +
scale_y_continuous(name="Genome Length (in best organism)",
limits=c(0, 1025)) +
facet_wrap(~ chg_rate_label) +
ggtitle("Gradient Fitness Model") +
theme(legend.position="none",
axis.text.x=element_text(angle=90))
ggplot(data_nk_phase0,
aes(x=BIT_FLIP_PROB, y=genome_length, color=BIT_FLIP_PROB)) +
geom_boxplot() +
xlab("Bit Flip Rate") +
scale_y_continuous(name="Genome Length (in best organism)",
limits=c(0, 1025)) +
facet_wrap(~ chg_rate_label) +
ggtitle("NK Fitness Model") +
theme(legend.position="none",
axis.text.x=element_text(angle=90))
ggplot(data_gradient_phase0,
aes(x=BIT_FLIP_PROB, y=neutral_sites, color=BIT_FLIP_PROB)) +
geom_boxplot() +
xlab("Bit Flip Rate") +
scale_y_continuous(name="Neutral Sites (in best organism)") +
facet_wrap(~ chg_rate_label) +
ggtitle("Gradient Fitness Model") +
theme(legend.position="none",
axis.text.x=element_text(angle=90))
ggplot(data_nk_phase0,
aes(x=BIT_FLIP_PROB, y=neutral_sites, color=BIT_FLIP_PROB)) +
geom_boxplot() +
xlab("Bit Flip Rate") +
scale_y_continuous(name="Neutral Sites (in best organism)") +
facet_wrap(~ chg_rate_label) +
ggtitle("NK Fitness Model") +
theme(legend.position="none",
axis.text.x=element_text(angle=90))
ggplot(data_gradient_phase1,
aes(x=BIT_FLIP_PROB, y=fitness, color=BIT_FLIP_PROB)) +
geom_boxplot() +
xlab("Bit Flip Rate") +
scale_y_continuous(name="Fitness") +
facet_wrap(~ chg_rate_label) +
ggtitle("Gradient Fitness Model") +
theme(legend.position="none",
axis.text.x=element_text(angle=90))
ggplot(data_nk_phase1,
aes(x=BIT_FLIP_PROB, y=fitness, color=BIT_FLIP_PROB)) +
geom_boxplot() +
xlab("Bit Flip Rate") +
scale_y_continuous(name="Fitness") +
facet_wrap(~ chg_rate_label) +
ggtitle("NK Fitness Model") +
theme(legend.position="none",
axis.text.x=element_text(angle=90))
ggplot(data_gradient_phase1,
aes(x=coding_sites, y=fitness, color=chg_rate_label)) +
geom_jitter() +
xlab("Coding Sites") +
scale_y_continuous(name="Fitness") +
facet_wrap(~ BIT_FLIP_PROB) +
ggtitle("Gradient Fitness Model") +
theme(legend.position = "bottom")
ggplot(data_nk_phase1,
aes(x=coding_sites, y=fitness, color=chg_rate_label)) +
geom_jitter() +
xlab("Coding Sites") +
scale_y_continuous(name="Fitness") +
facet_wrap(~ BIT_FLIP_PROB) +
ggtitle("NK Fitness Model") +
theme(legend.position = "bottom")
rates = c("drift", "0", "1/4")
labels <- c(
"drift"="Drift",
"0"="Static",
"1/4"="Changing Env. (1/4)"
)
ggplot(filter(data_gradient_phase0, chg_rate_label %in% rates),
aes(x=BIT_FLIP_PROB, y=coding_sites, fill=BIT_FLIP_PROB)) +
geom_boxplot() +
xlab("Bit Flip Mutation Rate") +
scale_y_continuous(name="Coding Sites\n(in best organism)",
limits=c(0, 130),
breaks=seq(0, 130, 20)) +
facet_wrap(~ chg_rate_label, labeller = labeller(chg_rate_label=labels)) +
theme(legend.position="none",
axis.text.x=element_text(angle=90)) +
ggsave("./imgs/coding-sites-v-mutation-gradient-phase0.pdf", width=10, height=7)
env_sim_data_path <- "./data/env_sim_gene_overlap.csv"
env_sim_data <- read.csv(env_sim_data_path, na.strings="NONE")
env_sim_data <- filter(env_sim_data, GRADIENT_MODEL==1)
env_sim_data$BIT_FLIP_PROB <- as.factor(env_sim_data$BIT_FLIP_PROB)
env_sim_data$DRIFT <- env_sim_data$TOURNAMENT_SIZE==1
env_sim_data$chg_rate_label <- factor(mapply(chg_rate_label,
env_sim_data$CHANGE_MAGNITUDE,
env_sim_data$CHANGE_FREQUENCY,
env_sim_data$DRIFT),
levels=c("drift", "0", "1/256", "1/128",
"1/64", "1/32", "1/16", "1/8",
"1/4", "1/2", "1/1", "2/1",
"4/1", "8/1", "16/1", "32/1",
"64/1", "128/1", "256/1",
"512/1", "1024/1", "2048/1",
"4096/1"))
env_sim_data$gene_pair_target_similarity_factor <-
as.factor(env_sim_data$gene_pair_target_similarity)
env_sim_data$gene_pair_overlap_factor <-
as.factor(env_sim_data$gene_pair_overlap)
env_sim_data$gene_pair_target_max_alignment_similarity_factor <-
as.factor(env_sim_data$gene_pair_target_max_alignment_similarity)
env_sim_data_selection <- filter(env_sim_data, TOURNAMENT_SIZE==8)
env_sim_data_drift <- filter(env_sim_data, TOURNAMENT_SIZE==1)
env_sim_data_selection_static <- filter(env_sim_data_selection, chg_rate_label=="0")
We’ll look at just the static environment.
For each replicate, we measure the pairwise gene target similarity (1 - Hamming Distance). I.e., we measure the number of bits that match between each pair of gene targets (within a run).
These measures are for naively aligned gene targets.
# Occurences of environment similarity (static environment)
a <-
ggplot(env_sim_data_selection_static,
aes(x=gene_pair_target_similarity_factor)) +
geom_histogram(stat="count") +
ylab("Occurences") +
ylim(0, 6000) +
xlab("Pairwise gene target similarity") +
facet_wrap(~BIT_FLIP_PROB) +
ggtitle("Selection")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
b <-
ggplot(env_sim_data_drift, aes(x=gene_pair_target_similarity_factor)) +
geom_histogram(stat="count") +
ylab("Occurences") +
ylim(0, 6000) +
xlab("Pairwise gene target similarity") +
facet_wrap(~BIT_FLIP_PROB) +
ggtitle("Drift")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
(a | b)
As expected, no differences between drift and selection conditions (no reason to expect there to be a difference).
Here, for each pair of gene targets (within a run), we measure the best hamming similarity across all possible alignments of two gene targets.
# Occurences of environment similarities (given max alignment)
a <-
ggplot(env_sim_data_selection_static,
aes(x=gene_pair_target_max_alignment_similarity_factor)) +
geom_histogram(stat="count") +
ylab("Occurences") +
ylim(0, 6000) +
xlab("Pairwise gene target similarity\n(best alignment)") +
facet_wrap(~BIT_FLIP_PROB) +
ggtitle("Selection")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
b <-
ggplot(env_sim_data_drift,
aes(x=gene_pair_target_max_alignment_similarity_factor)) +
geom_histogram(stat="count") +
ylab("Occurences") +
xlab("Pairwise gene target similarity\n(best_alignment)") +
ylim(0, 6000) +
facet_wrap(~BIT_FLIP_PROB) +
ggtitle("Drift")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
(a | b)
Using the best-alignment, shifts the distribution of observed gene target similarities to the right.
For each evolved genetic architecture, we measure the pairwise gene overlap among each of the 16 genes.
ggplot(filter(env_sim_data_drift),
aes(x=gene_pair_overlap_factor)) +
geom_histogram(stat="count") +
ylab("Occurences") +
xlab("Pairwise gene overlap") +
facet_wrap(~BIT_FLIP_PROB) +
ggtitle("Drift")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
ggplot(filter(env_sim_data_drift, gene_pair_overlap>0),
aes(x=gene_pair_overlap_factor)) +
geom_histogram(stat="count") +
ylab("Occurences") +
xlab("Pairwise gene overlap (excluding 0)") +
facet_wrap(~BIT_FLIP_PROB) +
ggtitle("Drift")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
ggplot(env_sim_data_selection_static,
aes(x=gene_pair_overlap_factor)) +
geom_histogram(stat="count") +
ylab("Occurences") +
xlab("Pairwise gene overlap") +
facet_wrap(~BIT_FLIP_PROB) +
ggtitle("Static Environment")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
ggplot(filter(env_sim_data_selection_static, gene_pair_overlap>0),
aes(x=gene_pair_overlap_factor)) +
geom_histogram(stat="count") +
ylab("Occurences") +
xlab("Pairwise gene overlap (excluding 0)") +
facet_wrap(~BIT_FLIP_PROB) +
ggtitle("Static Environment")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
ggplot(filter(env_sim_data_selection, chg_rate_label=="1/128"),
aes(x=gene_pair_overlap_factor)) +
geom_histogram(stat="count") +
ylab("Occurences") +
xlab("Pairwise gene overlap") +
facet_wrap(~BIT_FLIP_PROB) +
ggtitle("Changing Environment (1/128)")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
ggplot(filter(env_sim_data_selection, chg_rate_label=="1/128" & gene_pair_overlap > 0),
aes(x=gene_pair_overlap_factor)) +
geom_histogram(stat="count") +
ylab("Occurences") +
xlab("Pairwise gene overlap (excluding 0)") +
facet_wrap(~BIT_FLIP_PROB) +
ggtitle("Changing Environment (1/128)")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
ggplot(filter(env_sim_data_selection, chg_rate_label=="1/4"),
aes(x=gene_pair_overlap_factor)) +
geom_histogram(stat="count") +
ylab("Occurences") +
xlab("Pairwise gene overlap") +
facet_wrap(~BIT_FLIP_PROB) +
ggtitle("Changing Environment (1/4)")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
ggplot(filter(env_sim_data_selection, chg_rate_label=="1/4" & gene_pair_overlap > 0),
aes(x=gene_pair_overlap_factor)) +
geom_histogram(stat="count") +
ylab("Occurences") +
xlab("Pairwise gene overlap (excluding 0)") +
facet_wrap(~BIT_FLIP_PROB) +
ggtitle("Changing Environment (1/4)")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
ggplot(filter(env_sim_data_selection, chg_rate_label=="4/1"),
aes(x=gene_pair_overlap_factor)) +
geom_histogram(stat="count") +
ylab("Occurences") +
xlab("Pairwise gene overlap") +
facet_wrap(~BIT_FLIP_PROB) +
ggtitle("Changing Environment (4/1)")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
ggplot(filter(env_sim_data_selection, chg_rate_label=="4/1" & gene_pair_overlap > 0),
aes(x=gene_pair_overlap_factor)) +
geom_histogram(stat="count") +
ylab("Occurences") +
xlab("Pairwise gene overlap (excluding 0)") +
facet_wrap(~BIT_FLIP_PROB) +
ggtitle("Changing Environment (4/1)")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
ggplot(env_sim_data,
aes(x=gene_pair_overlap_factor)) +
geom_histogram(stat="count") +
ylab("Occurences") +
xlab("Pairwise gene overlap") +
facet_grid(BIT_FLIP_PROB~chg_rate_label)
## Warning: Ignoring unknown parameters: binwidth, bins, pad
ggplot(filter(env_sim_data, gene_pair_overlap>0),
aes(x=gene_pair_overlap_factor)) +
geom_histogram(stat="count") +
ylab("Occurences") +
xlab("Pairwise gene overlap (excluding 0)") +
facet_grid(BIT_FLIP_PROB~chg_rate_label)
## Warning: Ignoring unknown parameters: binwidth, bins, pad
Does best-alignment gene target similarity predict gene overlap amount?
ggplot(env_sim_data_drift,
aes(x=gene_pair_target_max_alignment_similarity,
y=gene_pair_overlap)) +
geom_bin2d(binwidth=1) +
scale_fill_continuous(type ="viridis",
trans="log",
name="Count",
breaks=c(1, 10, 100, 1000)) +
scale_y_continuous(name="Gene Pair Overlap",
breaks=seq(0, 9)) +
scale_x_continuous(name="target similarity (best alignment)",
breaks=seq(0, 9)) +
facet_wrap(~BIT_FLIP_PROB)
Exclude 0 overlap
ggplot(filter(env_sim_data_drift, gene_pair_overlap > 0),
aes(x=gene_pair_target_max_alignment_similarity,
y=gene_pair_overlap)) +
geom_bin2d(binwidth=1) +
scale_fill_continuous(type ="viridis",
trans="log",
name="Count",
breaks=c(1, 10, 100, 1000)) +
scale_y_continuous(name="Gene Pair Overlap",
breaks=seq(0, 9)) +
scale_x_continuous(name="target similarity (best alignment)",
breaks=seq(0, 9)) +
facet_wrap(~BIT_FLIP_PROB)
ggplot(env_sim_data_selection_static,
aes(x=gene_pair_target_max_alignment_similarity,
y=gene_pair_overlap)) +
geom_bin2d(binwidth=1) +
scale_fill_continuous(type ="viridis",
trans="log",
name="Count",
breaks=c(1, 10, 100, 1000, 10000)) +
scale_y_continuous(name="Gene Pair Overlap",
breaks=seq(0, 9)) +
scale_x_continuous(name="target similarity (best alignment)",
breaks=seq(0, 9)) +
facet_wrap(~BIT_FLIP_PROB)
ggplot(filter(env_sim_data_selection_static, gene_pair_overlap > 0),
aes(x=gene_pair_target_max_alignment_similarity,
y=gene_pair_overlap)) +
geom_bin2d(binwidth=1) +
scale_fill_continuous(type ="viridis",
trans="log",
name="Count",
breaks=c(1, 10, 100, 1000, 10000)) +
scale_y_continuous(name="Gene Pair Overlap",
breaks=seq(0, 9)) +
scale_x_continuous(name="target similarity (best alignment)",
breaks=seq(0, 9)) +
facet_wrap(~BIT_FLIP_PROB)
Compute proportions
mut_rates <- levels(env_sim_data$BIT_FLIP_PROB)
# chg_rates <- levels(env_sim_data$chg_rate_label)
chg_rates <- c("drift", "0")
gene_overlap_levels <-
levels(env_sim_data$gene_pair_overlap_factor)
gene_target_best_alignment_sim_levels <-
levels(env_sim_data$gene_pair_target_max_alignment_similarity_factor)
overlap_proportions <-
read.csv(text="mutation_rate,chg_rate_label,gene_target_sim_level,gene_overlap_level,gene_overlap_prop,gene_overlap_cnt")
for (mut_rate in mut_rates) {
for (chg_rate in chg_rates) {
for (sim in gene_target_best_alignment_sim_levels) {
# For each similarity level, compute proportion for each level of gene overlap
d <- filter(env_sim_data,
BIT_FLIP_PROB==mut_rate &
chg_rate_label==chg_rate &
gene_pair_target_max_alignment_similarity_factor==sim)
total <- length(d$update) # How many total observations at this similarity level?
for (overlap in gene_overlap_levels) {
overlap_proportion <- 0
# How many are at this overlap level?
overlap_cnt <- length(filter(d, gene_pair_overlap_factor==overlap)$update)
if (total > 0) {
overlap_proportion <- overlap_cnt / total
} else {
overlap_proportion <- 0
}
new_row <- data.frame(mutation_rate=c(mut_rate),
chg_rate_label=c(chg_rate),
gene_target_sim_level=c(sim),
gene_overlap_level=c(overlap),
gene_overlap_prop=c(overlap_proportion),
gene_overlap_cnt=c(overlap_cnt))
overlap_proportions <- rbind(overlap_proportions, new_row)
}
}
}
}
ggplot(overlap_proportions,
aes(x=gene_target_sim_level, y=gene_overlap_level, fill=gene_overlap_prop)) +
geom_tile() +
xlab("Gene Target Similarity (best alignment)") +
scale_fill_continuous(type ="viridis") +
facet_grid(mutation_rate~chg_rate_label) +
ggsave("./imgs/gene_overlap_proportions.pdf", width=10, height=20)
ggplot(overlap_proportions,
aes(x=gene_target_sim_level, y=gene_overlap_prop, fill=gene_overlap_level)) +
geom_bar(position="fill", stat="identity") +
xlab("Gene Target Similarity (best alignment)") +
ylab("Proportion of Pairs") +
scale_fill_discrete(name="Gene Overlap Amount") +
facet_grid(mutation_rate~chg_rate_label) +
theme(legend.position = "top") +
ggsave("./imgs/gene_overlap_proportions_stackedbar.pdf", width=10, height=20)
## Warning: Removed 90 rows containing missing values (geom_bar).
## Warning: Removed 90 rows containing missing values (geom_bar).
### Collect proportion non-zero
overlap_proportions_non0 <-
read.csv(text="mutation_rate,chg_rate_label,gene_target_sim_level,gene_overlap_level,gene_overlap_prop,gene_overlap_cnt")
for (mut_rate in mut_rates) {
for (chg_rate in chg_rates) {
for (sim in gene_target_best_alignment_sim_levels) {
# For each similarity level, compute proportion for each level of gene overlap
d <- filter(env_sim_data,
BIT_FLIP_PROB==mut_rate &
chg_rate_label==chg_rate &
gene_pair_target_max_alignment_similarity_factor==sim &
gene_pair_overlap!=0)
total <- length(d$update) # How many total observations at this similarity level?
for (overlap in gene_overlap_levels) {
if (overlap=="0") { next() }
overlap_proportion <- 0
# How many are at this overlap level?
overlap_cnt <- length(filter(d, gene_pair_overlap_factor==overlap)$update)
if (total > 0) {
overlap_proportion <- overlap_cnt / total
} else {
overlap_proportion <- 0
}
new_row <- data.frame(mutation_rate=c(mut_rate),
chg_rate_label=c(chg_rate),
gene_target_sim_level=c(sim),
gene_overlap_level=c(overlap),
gene_overlap_prop=c(overlap_proportion),
gene_overlap_cnt=c(overlap_cnt))
overlap_proportions_non0 <- rbind(overlap_proportions_non0, new_row)
}
}
}
}
ggplot(overlap_proportions_non0,
aes(x=gene_target_sim_level, y=gene_overlap_level, fill=gene_overlap_prop)) +
geom_tile() +
xlab("Gene Target Similarity (best alignment)") +
scale_fill_continuous(type ="viridis") +
facet_grid(mutation_rate~chg_rate_label) +
ggsave("./imgs/gene_overlap_proportions_non0.pdf", width=10, height=20)
ggplot(overlap_proportions_non0,
aes(x=gene_target_sim_level, y=gene_overlap_prop, fill=gene_overlap_level)) +
geom_bar(position="fill", stat="identity") +
xlab("Gene Target Similarity (best alignment)") +
scale_fill_viridis(discrete = TRUE) +
facet_grid(mutation_rate~chg_rate_label) +
ggsave("./imgs/gene_overlap_proportions_stackedbar_non0.pdf", width=10, height=20)
## Warning: Removed 152 rows containing missing values (geom_bar).
## Warning: Removed 152 rows containing missing values (geom_bar).